Github source for data https://github.com/nytimes/covid-19-data/tree/master/mask-use
#Source for data
url <- "https://github.com/nytimes/covid-19-data/raw/master/mask-use/mask-use-by-county.csv"
nyt_mask_survey <- read_csv(here::here("data", "nyt_mask_survey.csv"))
nyt_mask_survey <- nyt_mask_survey %>%
clean_names() %>%
mutate(
mostly_yes= frequently+always,
mostly_no = never+rarely,
delta = mostly_yes-mostly_no
)
glimpse(nyt_mask_survey)
## Rows: 3,142
## Columns: 9
## $ countyfp <chr> "01001", "01003", "01005", "01007", "01009", "01011"…
## $ never <dbl> 0.053, 0.083, 0.067, 0.020, 0.053, 0.031, 0.102, 0.1…
## $ rarely <dbl> 0.074, 0.059, 0.121, 0.034, 0.114, 0.040, 0.053, 0.1…
## $ sometimes <dbl> 0.134, 0.098, 0.120, 0.096, 0.180, 0.144, 0.257, 0.1…
## $ frequently <dbl> 0.295, 0.323, 0.201, 0.278, 0.194, 0.286, 0.137, 0.1…
## $ always <dbl> 0.444, 0.436, 0.491, 0.572, 0.459, 0.500, 0.451, 0.4…
## $ mostly_yes <dbl> 0.739, 0.759, 0.692, 0.850, 0.653, 0.786, 0.588, 0.6…
## $ mostly_no <dbl> 0.127, 0.142, 0.188, 0.054, 0.167, 0.071, 0.155, 0.2…
## $ delta <dbl> 0.612, 0.617, 0.504, 0.796, 0.486, 0.715, 0.433, 0.3…
The FIPS code is a federal code that numbers states and territories of the US. It extends to the county level with an additional four digits, so every county in the US has a unique six-digit identifier, where the first two digits represent the state.
We will be using Kieran Healy’s socviz package which among other things contains county_map and county_data
# America’s choropleths; use county_map that has all polygons
# and county data with demographics/election data from socviz datafile
# The id field is the FIPS code for the county
county_map %>%
sample_n(5)
## long lat order hole piece group id
## 1 57111 -244795 110862 FALSE 1 0500000US31103.1 31103
## 2 1423522 -669990 66797 FALSE 1 0500000US21129.1 21129
## 3 1259604 -680561 63644 FALSE 1 0500000US21029.1 21029
## 4 275597 -267007 110332 FALSE 1 0500000US31051.1 31051
## 5 27728 -137268 148481 FALSE 1 0500000US46085.1 46085
county_data %>%
sample_n(5)
## id name state census_region pop_dens pop_dens4
## 1 46115 Spink County SD Midwest [ 0, 10) [ 0, 17)
## 2 55085 Oneida County WI Midwest [ 10, 50) [ 17, 45)
## 3 21235 Whitley County KY South [ 50, 100) [ 45, 118)
## 4 05029 Conway County AR South [ 10, 50) [ 17, 45)
## 5 26137 Otsego County MI Midwest [ 10, 50) [ 45, 118)
## pop_dens6 pct_black pop female white black travel_time land_area
## 1 [ 0, 9) [ 0.0, 2.0) 6598 49.6 96.7 0.7 15.8 1504
## 2 [ 25, 45) [ 0.0, 2.0) 35563 50.1 96.6 0.5 19.2 1113
## 3 [ 45, 82) [ 0.0, 2.0) 35503 51.0 97.1 0.8 19.7 438
## 4 [ 25, 45) [10.0,15.0) 21083 51.0 85.1 11.4 22.2 552
## 5 [ 45, 82) [ 0.0, 2.0) 24158 50.8 96.4 0.5 20.5 515
## hh_income su_gun4 su_gun6 fips votes_dem_2016 votes_gop_2016
## 1 48911 [ 0, 5) [ 0, 4) 46115 919 1854
## 2 45759 [ 8,11) [ 8,10) 55085 8103 11677
## 3 29769 [11,54] [12,54] 21235 2067 11312
## 4 35225 [11,54] [10,12) 5029 2655 4844
## 5 47584 [ 8,11) [ 8,10) 26137 3556 8266
## total_votes_2016 per_dem_2016 per_gop_2016 diff_2016 per_dem_2012
## 1 2951 0.311 0.628 935 0.427
## 2 20837 0.389 0.560 3574 0.484
## 3 13765 0.150 0.822 9245 0.205
## 4 7837 0.339 0.618 2189 0.389
## 5 12529 0.284 0.660 4710 0.396
## per_gop_2012 diff_2012 winner partywinner16 winner12 partywinner12
## 1 0.549 370 Trump Republican Romney Republican
## 2 0.505 466 Trump Republican Romney Republican
## 3 0.783 7549 Trump Republican Romney Republican
## 4 0.584 1508 Trump Republican Romney Republican
## 5 0.592 2327 Trump Republican Romney Republican
## flipped
## 1 No
## 2 No
## 3 No
## 4 No
## 5 No
glimpse(county_data)
## Rows: 3,195
## Columns: 32
## $ id <chr> "0", "01000", "01001", "01003", "01005", "0100…
## $ name <chr> NA, "1", "Autauga County", "Baldwin County", "…
## $ state <fct> NA, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL…
## $ census_region <fct> NA, South, South, South, South, South, South, …
## $ pop_dens <fct> "[ 50, 100)", "[ 50, 100)", "[ 50, 10…
## $ pop_dens4 <fct> "[ 45, 118)", "[ 45, 118)", "[ 45, 118)", "…
## $ pop_dens6 <fct> "[ 82, 215)", "[ 82, 215)", "[ 82, 215)", "…
## $ pct_black <fct> "[10.0,15.0)", "[25.0,50.0)", "[15.0,25.0)", "…
## $ pop <int> 318857056, 4849377, 55395, 200111, 26887, 2250…
## $ female <dbl> 50.8, 51.5, 51.5, 51.2, 46.5, 46.0, 50.6, 45.2…
## $ white <dbl> 77.7, 69.8, 78.1, 87.3, 50.2, 76.3, 96.0, 27.2…
## $ black <dbl> 13.2, 26.6, 18.4, 9.5, 47.6, 22.1, 1.8, 69.9, …
## $ travel_time <dbl> 25.5, 24.2, 26.2, 25.9, 24.6, 27.6, 33.9, 26.9…
## $ land_area <dbl> 3531905, 50645, 594, 1590, 885, 623, 645, 623,…
## $ hh_income <int> 53046, 43253, 53682, 50221, 32911, 36447, 4414…
## $ su_gun4 <fct> NA, NA, "[11,54]", "[11,54]", "[ 5, 8)", "[11,…
## $ su_gun6 <fct> NA, NA, "[10,12)", "[10,12)", "[ 7, 8)", "[10,…
## $ fips <dbl> 0, 1000, 1001, 1003, 1005, 1007, 1009, 1011, 1…
## $ votes_dem_2016 <int> NA, NA, 5908, 18409, 4848, 1874, 2150, 3530, 3…
## $ votes_gop_2016 <int> NA, NA, 18110, 72780, 5431, 6733, 22808, 1139,…
## $ total_votes_2016 <int> NA, NA, 24661, 94090, 10390, 8748, 25384, 4701…
## $ per_dem_2016 <dbl> NA, NA, 0.2396, 0.1957, 0.4666, 0.2142, 0.0847…
## $ per_gop_2016 <dbl> NA, NA, 0.734, 0.774, 0.523, 0.770, 0.899, 0.2…
## $ diff_2016 <int> NA, NA, 12202, 54371, 583, 4859, 20658, 2391, …
## $ per_dem_2012 <dbl> NA, NA, 0.266, 0.216, 0.513, 0.262, 0.123, 0.7…
## $ per_gop_2012 <dbl> NA, NA, 0.726, 0.774, 0.483, 0.731, 0.865, 0.2…
## $ diff_2012 <int> NA, NA, 11012, 47443, 334, 3931, 17780, 2808, …
## $ winner <chr> NA, NA, "Trump", "Trump", "Trump", "Trump", "T…
## $ partywinner16 <chr> NA, NA, "Republican", "Republican", "Republica…
## $ winner12 <chr> NA, NA, "Romney", "Romney", "Obama", "Romney",…
## $ partywinner12 <chr> NA, NA, "Republican", "Republican", "Democrat"…
## $ flipped <chr> NA, NA, "No", "No", "Yes", "No", "No", "No", "…
# we have data on 3195 FIPS....
glimpse(county_map)
## Rows: 191,382
## Columns: 7
## $ long <dbl> 1225889, 1235324, 1244873, 1244129, 1272010, 1276797, 127…
## $ lat <dbl> -1275020, -1274008, -1272331, -1267515, -1262889, -129551…
## $ order <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ hole <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
## $ piece <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ group <fct> 0500000US01001.1, 0500000US01001.1, 0500000US01001.1, 050…
## $ id <chr> "01001", "01001", "01001", "01001", "01001", "01001", "01…
# ... but to create a map, we translate these 3195 counties to 191,382 polygons!
We have three files
nyt_mask_survey, our NYT survey data,county_map that has all polygons that define a countycounty_data with demographics/election data.county_full <- left_join(county_map, county_data, by = "id")
county_masks_full <- left_join(county_full, nyt_mask_survey,
by = c("id"="countyfp"))
p <- ggplot(data = county_masks_full,
mapping = aes(x = long, y = lat,
fill = delta,
group = group))
p1 <- p +
geom_polygon(color = "gray90", size = 0.05) +
coord_equal()
p2 <- p1 +
scale_fill_gradient(low = '#ffffcc', high= '#006837')
p3 <- p1 +
scale_fill_gradient2()
# get different colours from https://colorbrewer2.org/
# the one shown here is https://colorbrewer2.org/#type=diverging&scheme=BrBG&n=6
p4 <- p1 +
scale_fill_gradientn(colours = c('#8c510a','#d8b365','#f6e8c3','#c7eae5','#5ab4ac','#01665e'))
p1
p2
p3
p4
p4 + labs(fill = "Mask acceptance, (Mostly Yes - Mostly No)",
caption = "“Estimates from The New York Times, based on roughly 250,000 interviews \nconducted by Dynata from July 2 to July 14, 2020”") +
guides(fill = guide_legend(nrow = 1)) +
theme_map() +
theme(legend.position = "bottom")
Does mask use acceptance have any relation with some demographics? Let us explor the relationship between country household income, population, and % who voted republican in 2016
county_masks_full %>%
select(hh_income, pop, per_gop_2016, delta) %>%
GGally::ggpairs()+
theme_minimal()